#!/bin/bash

## 计算指定日期的用户地域分布 /user/hive/warehouse/pass_data.db/user_login/dt=20150601

source ~/.bashrc

## 运行指定日期的数据
declare cur_date
if [[ $# -ne 1 ]];then
	echo "usage:sh run_regionMR.sh 20160601	:date is must!"
	exit 1
else
	cur_date=$(date +"%Y%m%d" -d "$1")
	if [[ $? -ne 0 ]];then
		echo "usage:sh run_regionMR.sh 20160601 	:date is invalid!"
	fi
fi
echo "==========sh run_regionMR.sh $cur_date==========="

## 扫描user_login表dt=cur_date的分区
hdfsPath=/user/hive/warehouse/pass_data.db/user_login/dt=$cur_date
declare flag=false
declare tryNum=$(expr 6 \* 10) # 10hours
while true
do
	result=$(hdfs dfs -ls $hdfsPath)
	if [[ $? -eq 0 ]];then
		flag=true
		break
	fi
	tryNum=$(expr $tryNum - 1)
	if [[ $tryNum -lt 0 ]];then
		break
	fi
	sleep $(expr 1 \* 60 \* 10) # sleep 10minues
done

## 目录不存在，直接退出
if [[ ! $flag ]];then
	echo "$hdfsPath is not exists!"
	exit 1
fi

## mr计算
output=/tmp/region/$cur_date
localFile=/home/hadoop/udm/region_$cur_date
hdfs dfs -rm -r $output
hadoop jar udm-1.0-jar-with-dependencies.jar com.baidu.udm.mr.UserModel_Region $hdfsPath $output
if [[ $? -ne 0 ]];then
	echo "run mr failed! error:input=$hdfsPath output=$output"
	exit 1
fi
